Load packages

#installation if necessary
#if (!require("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#if (!requireNamespace("devtools", quietly = TRUE)) {
#  install.packages("devtools")
#}

#BiocManager::install("DESeq2")
#BiocManager::install("tximeta")
#install.packages("tidyselect")

#loading in the data
library(tidyverse)
library(datapasta) # allows for copy and pasting
library(DESeq2)
library(SummarizedExperiment)
library(tximeta)
library(dplyr)

#filtering and normalization
#BiocManager::install("vsn")

#library(matrixStats)

#library(hexbin)
#library("vsn")

#multivariate analysis (edgeR)
library(edgeR)
if (!require("DT")) install.packages("DT"); library(DT) # for making interactive tables
if (!require("plotly")) install.packages("plotly"); library(plotly) # for making interactive plots
if (!require("gt")) install.packages("gt"); library(gt) # A layered 'grammar of tables' - think ggplot, but for tables
if (!require("stargazer")) install.packages("stargazer"); library(stargazer)
if (!require("rgl")) install.packages("rgl"); library(rgl)
if (!require("dendextend")) install.packages("dendextend"); library(dendextend) # A way to customize dendrograms to make more complicated visualizations
if (!require("ggpattern")) install.packages("ggpattern"); library(ggpattern)
if (!require("data.table")) install.packages("data.table"); library(data.table)

#making pretty figs
#devtools::install_github("thomasp85/patchwork")
library(patchwork)
library(ggplot2)
library(cowplot)

#Check for necessary packages - example cod for making this prettier
#list.of.packages <- c("ggplot2", 
#                      "tidyr",
#                      "dplyr",
#                      "Hmisc",
#                      "qqplotr",
#                      "ggthemes",
#                      "fabricatr",
#                      "gridExtra",
#                      "grid",
#                      "kableExtra",
#                      "sjPlot",
#                      "cowplot",
#                      "ggpubr",
#                      "patchwork",
#                      "magick", #so you can use the savekable function
#                      "webshot", #so you can use the savekable function
#                      "lme4",
#                      "RcppEigen")

#should install packages that you don't have
#new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
#if(length(new.packages)) install.packages(new.packages)

#Load the package that contains the Cox Propotional Hazard Function
#library(")

Clear workspace

rm(list = ls())

Read in data

  • Gene-level analysis

Read in sample metadata

#read in sample metadata
meta <- read.delim("study_design.txt", header = TRUE, as.is = TRUE)

#make the SAMPLE column the rownames
rownames(meta) <- meta$SAMPLE

#add a names column (for lmerSeq later)
meta$names <- meta$SAMPLE

#capture sample labels from the design study file
sampleLabels <- meta$SAMPLE

Import variance-stabilized data

vst_expr <- read_tsv("./variance_stabilized_counts_2025_03_12.tsv")

Import gene annotations

#get annotations for transcripts by loading file from ncbi ---- EDITED TO MATCH ORs ADDED FROM Mitchell's ANNOTATION----
annotation_file <- ("Ppyr_annotations_ORmodified_2025_03_12.tsv")

#Tx <- read_tsv(annotation_file)
Tx <- read_tsv(annotation_file, col_names = c("target_id", "gene_name", "name", "PpyrOR"), col_types = "cccc")

Tx <- as_tibble(Tx)

#transcript ID needs to be the first column in the dataframe
Tx <- dplyr::select(Tx, "target_id", "gene_name")

Multivariate analysis: Checking sources of variation

Multivariate analysis will allow us to explore sources of variation in the data.

We want to explore:

  • Technical sources of variation
    • Does the machine a sample was run on matter?
    • What about the particular run?
    • What about the combo of run and lane?

Note: “run” and “machine” are confounded - only need to use one of these variables

  • Biological sources of variation
    • What about tissue? - we hope that this is a major source of variation
    • What about sex? - we think that this will also be a source of variation
    • What about collection date - do fireflies collected on different dates have different expression?
    • What about preservation date - do fireflies preserved in RNAlater on different dates have different expression?
    • What about the time that samples were preserved - does it matter?
    • What about the combo of date and time?
    • What about the individual? We would expect some differences in baseline among individuals
    • What about if a specimen was first used for SPME before preservation (exposed to CO2)? That could affect its gene expression. - only 1 used for SPME
    • What about how long the specimen was in captivity before preservation? Perhaps longer in captivity would change gene expression profile?
machine <- meta$MACHINE
machine <- factor(machine)

run <- meta$RUN
run <- factor(run)

run_lane <- meta$RUN_LANE_COMBO
run_lane <- factor(run_lane)

tissue <- meta$TISSUE
tissue <- factor(tissue)

sex <- meta$SEX
sex <- factor(sex)

collection_date <- meta$DATE_COLLECTED
collection_date <- factor(collection_date)

preservation_date <- meta$DATE_PRESERVED
preservation_date <- factor(preservation_date)

time_preserved <- as.character(meta$TIME_PRESERVED)
time_preserved <- factor(time_preserved)

date_by_time_preserved <- meta$DATE_BY_TIME_PRESERVED
date_by_time_preserved <- factor(date_by_time_preserved)

individual <- meta$INDIVIDUAL
individual <- factor(individual)

spme <- meta$SPME_USE
spme <- factor(spme)

captivity <- meta$CAPTIVE_DURATION_hrs
captivity <- factor(captivity)

Then, perform some multivariate analyses:

Hierarchichal clustering

#convert vst_expr to a matrix for pca
vst_expr.m <- as.matrix(vst_expr[,1:ncol(vst_expr)-1])


#change sample names to include sex - helps with visualization later
colnames(vst_expr.m) <- c("SEL534ant (M)", "SEL534BL (M)", "SEL535ant (M)", "SEL535BL (M)", "SEL536ant (M)", "SEL536BL (M)", "SEL543ant (M)", "SEL543BL (M)", "SEL562ant (F)",  "SEL562BL (F)", "SEL627ant (F)", "SEL627BL (F)", "SEL672ant (F)", "SEL672BL (F)") 

#hierarchical clustering can only work on a data matrix, not a data frame
distance <- dist(t(vst_expr.m), method = "maximum") #other distance methods are "euclidean", maximum", "manhattan", "canberra", "binary" or "minkowski"
clusters <- hclust(distance, method = "average") #other agglomeration methods are "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", or "centroid"
dend <- as.dendrogram(clusters) # Assign the dendrogram so we can edit it

#pdf("hierarchical_clustering_dendrogram_SEL_2025_01_15.pdf")
plot(clusters, labels=labels(dend))

#dev.off()
  • Clustering doesn’t show major groupings by tissue or sex, as expected. Maybe PCA can help disentangle this.

Principal component analysis (PCA)

Calculate the principal components

pca.res <- prcomp(t(vst_expr.m), scale.=F, retx=T)
#look at the PCA result (pca.res) that you just created

summary(pca.res) # Prints variance summary for all principal components.
## Importance of components:
##                            PC1     PC2      PC3     PC4      PC5      PC6
## Standard deviation     36.2970 18.2412 16.35102 13.8932 13.48014 13.11944
## Proportion of Variance  0.4559  0.1152  0.09252  0.0668  0.06288  0.05956
## Cumulative Proportion   0.4559  0.5711  0.66359  0.7304  0.79327  0.85284
##                             PC7     PC8     PC9    PC10    PC11    PC12    PC13
## Standard deviation     11.95800 9.74443 7.38734 6.58893 5.72460 5.47876 5.15069
## Proportion of Variance  0.04948 0.03286 0.01889 0.01502 0.01134 0.01039 0.00918
## Cumulative Proportion   0.90232 0.93518 0.95407 0.96909 0.98043 0.99082 1.00000
##                             PC14
## Standard deviation     1.597e-13
## Proportion of Variance 0.000e+00
## Cumulative Proportion  1.000e+00
#some useful tools for looking at the data
#pca.res$rotation #$rotation shows you how much each gene influenced each PC (called 'scores')
#pca.res$x # 'x' shows you how much each sample influenced each PC (called 'loadings')
#note that these have a magnitude and a direction (this is the basis for making a PCA plot)

#pdf("screeplot_2025_03_12.pdf")
require(graphics)
#plot(pca.res) #This is the same as screeplot()
screeplot(pca.res) # A screeplot is a standard way to view eigenvalues for each PCA

#dev.off()
  • The screeplot output shows the amount of variance attributed to each principal component (PC). The output shows for the top 10 PCs.
  • From the table,
    • PC1 accounts for 45.6% of the variance in our data.
    • PC2 for 11.5%.
    • PC1 accounts for more than double the variance caused by any other PC.

Visualize the PCA

#get the variance percentages
pc.var <- pca.res$sdev^2 # sdev^2 captures these eigenvalues from the PCA result
pc.per <- round(pc.var/sum(pc.var)*100, 1) # we can then use these eigenvalues to calculate the percentage variance explained by each PC

#get PCA data as a tibble that can graph
pca.res.df <- as_tibble(pca.res$x)
pca.res.df$samplename <- rownames(pca.res$x)

#write_csv(pca.res.df, "PCA_loadings_table_2025_03_12.csv")

PC1 vs PC2

Major experimental factors vs PCs 1 & 2: tissue, sex

#Plot PC1 vs PC2

gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

n = 7
cols = gg_color_hue(n)

ggplot(pca.res.df, aes(x=PC1, y=PC2, fill = tissue, color = individual, shape = sex)) +
  geom_point(size = 4, stroke = 1.5) +
  xlab(paste0("PC1 (",pc.per[1],"%",")")) + 
  ylab(paste0("PC2 (",pc.per[2],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(21,24), labels = c("female", "male")) +
  scale_fill_manual(values = c("gray31", "white"), labels = c("antenna", "leg")) +
  guides(
    fill = guide_legend(title = "Tissue", override.aes=list(shape = 21)), 
    shape = guide_legend(title = "Sex"),
    color = guide_legend(title = "Individual"))

#ggsave("PCA_plot_ind_tissue_sex_2025_03_12.png", dev="png")
  • PC1 correlates with tissue type as the antennae samples and the leg samples congregated to different ends of the plot.
  • PC2 appears to separate sex

Everything else PCs 1 & 2: PC2 = preservation date? -> confounded with sex

f <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = machine)) +
  geom_point(size = 4) +
  xlab(paste0("PC1 (",pc.per[1],"%",")")) + 
  ylab(paste0("PC2 (",pc.per[2],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

h <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = run_lane)) +
  geom_point(size = 4) +
  xlab(paste0("PC1 (",pc.per[1],"%",")")) + 
  ylab(paste0("PC2 (",pc.per[2],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

i <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = collection_date)) +
  geom_point(size = 4) +
  xlab(paste0("PC1 (",pc.per[1],"%",")")) + 
  ylab(paste0("PC2 (",pc.per[2],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

j <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = preservation_date)) +
  geom_point(size = 4) +
  xlab(paste0("PC1 (",pc.per[1],"%",")")) + 
  ylab(paste0("PC2 (",pc.per[2],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

k <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = time_preserved)) +
  geom_point(size = 4) +
  xlab(paste0("PC1 (",pc.per[1],"%",")")) + 
  ylab(paste0("PC2 (",pc.per[2],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

l <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = date_by_time_preserved)) +
  geom_point(size = 4) +
  xlab(paste0("PC1 (",pc.per[1],"%",")")) + 
  ylab(paste0("PC2 (",pc.per[2],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

m <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = spme)) +
  geom_point(size = 4) +
  xlab(paste0("PC1 (",pc.per[1],"%",")")) + 
  ylab(paste0("PC2 (",pc.per[2],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

n <- ggplot(pca.res.df, aes(x=PC1, y=PC2, color = captivity)) +
  geom_point(size = 4) +
  xlab(paste0("PC1 (",pc.per[1],"%",")")) + 
  ylab(paste0("PC2 (",pc.per[2],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

plot_grid(f, h, i, j, k, l, m, n, labels = c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'), label_size = 12, ncol= 2, align = "hv") # plot_grid() is from the cowplot package

  • PC2 looks associated with preservation date (but this is confounded by sex)

PC3 vs PC4

Major experimental factors vs PCs 3 & 4: nothing obvious

#Plot PC3 vs PC4
e <- ggplot(pca.res.df, aes(x=PC3, y=PC4, fill = tissue, color = individual, shape = sex)) +
  geom_point(size = 4, stroke = 1.5) +
  xlab(paste0("PC3 (",pc.per[3],"%",")")) + 
  ylab(paste0("PC4 (",pc.per[4],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(21,24), labels = c("female", "male")) +
  scale_fill_manual(values = c("gray31", "white"), labels = c("antenna", "leg")) +
  guides(
    fill = guide_legend(title = "Tissue", override.aes=list(shape = 21)), 
    shape = guide_legend(title = "Sex"),
    color = guide_legend(title = "Individual"))

e

  • Individuals are grouped-ish (so properties of the individual included in each)

Everything else PCs 3 & 4: machine?

f <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = machine)) +
  geom_point(size = 4) +
  xlab(paste0("PC3 (",pc.per[3],"%",")")) + 
  ylab(paste0("PC4 (",pc.per[4],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

h <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = run_lane)) +
  geom_point(size = 4) +
  xlab(paste0("PC3 (",pc.per[3],"%",")")) + 
  ylab(paste0("PC4 (",pc.per[4],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

i <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = collection_date)) +
  geom_point(size = 4) +
  xlab(paste0("PC3 (",pc.per[3],"%",")")) + 
  ylab(paste0("PC4 (",pc.per[4],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

j <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = preservation_date)) +
  geom_point(size = 4) +
  xlab(paste0("PC3 (",pc.per[3],"%",")")) + 
  ylab(paste0("PC4 (",pc.per[4],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

k <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = time_preserved)) +
  geom_point(size = 4) +
  xlab(paste0("PC3 (",pc.per[3],"%",")")) + 
  ylab(paste0("PC4 (",pc.per[4],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

l <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = date_by_time_preserved)) +
  geom_point(size = 4) +
  xlab(paste0("PC3 (",pc.per[3],"%",")")) + 
  ylab(paste0("PC4 (",pc.per[4],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

m <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = spme)) +
  geom_point(size = 4) +
  xlab(paste0("PC3 (",pc.per[3],"%",")")) + 
  ylab(paste0("PC4 (",pc.per[4],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

n <- ggplot(pca.res.df, aes(x=PC3, y=PC4, color = captivity)) +
  geom_point(size = 4) +
  xlab(paste0("PC3 (",pc.per[3],"%",")")) + 
  ylab(paste0("PC4 (",pc.per[4],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

plot_grid(f, h, i, j, k, l, m, n, labels = c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'), label_size = 12, ncol= 2, align = "hv") # plot_grid() is from the cowplot package

  • PC3 may (?) separate machine (but overlap)

PC5 vs PC6

Major experimental factors vs PCs 5 & 6: nothing obvious

f <- ggplot(pca.res.df, aes(x=PC5, y=PC6, fill = tissue, color = individual, shape = sex)) +
  geom_point(size = 4, stroke = 1.5) +
  xlab(paste0("PC5 (",pc.per[5],"%",")")) + 
  ylab(paste0("PC6 (",pc.per[6],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(21,24), labels = c("female", "male")) +
  scale_fill_manual(values = c("gray31", "white"), labels = c("antenna", "leg")) +
  guides(
    fill = guide_legend(title = "Tissue", override.aes=list(shape = 21)), 
    shape = guide_legend(title = "Sex"),
    color = guide_legend(title = "Individual"))

f

  • Individuals group together-ish - so properties of individuals

Everything else PCs 5 & 6: nothing obvious

f <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = machine)) +
  geom_point(size = 4) +
  xlab(paste0("PC5 (",pc.per[5],"%",")")) + 
  ylab(paste0("PC6 (",pc.per[6],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

h <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = run_lane)) +
  geom_point(size = 4) +
  xlab(paste0("PC5 (",pc.per[5],"%",")")) + 
  ylab(paste0("PC6 (",pc.per[6],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

i <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = collection_date)) +
  geom_point(size = 4) +
  xlab(paste0("PC5 (",pc.per[5],"%",")")) + 
  ylab(paste0("PC6 (",pc.per[6],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

j <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = preservation_date)) +
  geom_point(size = 4) +
  xlab(paste0("PC5 (",pc.per[5],"%",")")) + 
  ylab(paste0("PC6 (",pc.per[6],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

k <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = time_preserved)) +
  geom_point(size = 4) +
  xlab(paste0("PC5 (",pc.per[5],"%",")")) + 
  ylab(paste0("PC6 (",pc.per[6],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

l <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = date_by_time_preserved)) +
  geom_point(size = 4) +
  xlab(paste0("PC5 (",pc.per[5],"%",")")) + 
  ylab(paste0("PC6 (",pc.per[6],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

m <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = spme)) +
  geom_point(size = 4) +
  xlab(paste0("PC5 (",pc.per[5],"%",")")) + 
  ylab(paste0("PC6 (",pc.per[6],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

n <- ggplot(pca.res.df, aes(x=PC5, y=PC6, color = captivity)) +
  geom_point(size = 4) +
  xlab(paste0("PC5 (",pc.per[5],"%",")")) + 
  ylab(paste0("PC6 (",pc.per[6],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

plot_grid(f, h, i, j, k, l, m, n, labels = c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'), label_size = 12, ncol= 2, align = "hv") # plot_grid() is from the cowplot package

  • Nothing obvious

PC7 vs PC8

Major experimental factors vs PCs 7 & 8: nothing obvious

g <- ggplot(pca.res.df, aes(x=PC7, y=PC8, fill = tissue, color = individual, shape = sex)) +
  geom_point(size = 4, stroke = 1.5) +
  xlab(paste0("PC7 (",pc.per[7],"%",")")) + 
  ylab(paste0("PC8 (",pc.per[8],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(21,24), labels = c("female", "male")) +
  scale_fill_manual(values = c("gray31", "white"), labels = c("antenna", "leg")) +
  guides(
    fill = guide_legend(title = "Tissue", override.aes=list(shape = 21)), 
    shape = guide_legend(title = "Sex"),
    color = guide_legend(title = "Individual"))

g

Everything else PCs 7 & 8: nothing obvious

f <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = machine)) +
  geom_point(size = 4) +
  xlab(paste0("PC7 (",pc.per[7],"%",")")) + 
  ylab(paste0("PC8 (",pc.per[8],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

h <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = run_lane)) +
  geom_point(size = 4) +
  xlab(paste0("PC7 (",pc.per[7],"%",")")) + 
  ylab(paste0("PC8 (",pc.per[8],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

i <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = collection_date)) +
  geom_point(size = 4) +
  xlab(paste0("PC7 (",pc.per[7],"%",")")) + 
  ylab(paste0("PC8 (",pc.per[8],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

j <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = preservation_date)) +
  geom_point(size = 4) +
  xlab(paste0("PC7 (",pc.per[7],"%",")")) + 
  ylab(paste0("PC8 (",pc.per[8],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

k <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = time_preserved)) +
  geom_point(size = 4) +
  xlab(paste0("PC7 (",pc.per[7],"%",")")) + 
  ylab(paste0("PC8 (",pc.per[8],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

l <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = date_by_time_preserved)) +
  geom_point(size = 4) +
  xlab(paste0("PC7 (",pc.per[7],"%",")")) + 
  ylab(paste0("PC8 (",pc.per[8],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

m <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = spme)) +
  geom_point(size = 4) +
  xlab(paste0("PC7 (",pc.per[7],"%",")")) + 
  ylab(paste0("PC8 (",pc.per[8],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

n <- ggplot(pca.res.df, aes(x=PC7, y=PC8, color = captivity)) +
  geom_point(size = 4) +
  xlab(paste0("PC7 (",pc.per[7],"%",")")) + 
  ylab(paste0("PC8 (",pc.per[8],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
   coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(16,17)) +
  scale_alpha_manual(values = c(1, 0.6))

plot_grid(f, h, i, j, k, l, m, n, labels = c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'), label_size = 12, ncol= 2, align = "hv") # plot_grid() is from the cowplot package

  • Nothing obvious

PCA ‘small multiples’ charts

This is another way to view PCA laodings to understand impact of each sample on each principal component

Tissue

pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
              as_tibble() %>%
              add_column(sample = sampleLabels,
                          group = tissue)

pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
                          cols = PC1:PC14, # column names to be stored as a SINGLE variable
                          names_to = "PC", # name of that new variable (column)
                          values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.

ggplot(pca.pivot) +
  aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
  geom_bar(stat="identity") +
  facet_wrap(~PC) +
  labs(title="PCA 'small multiples' plot",
       caption=paste0("produced on ", Sys.time())) +
  theme_bw() +
  coord_flip()

  • Looking at the loadings, we can say with confidence that PC1 is tissue!

Sex

pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
  as_tibble() %>%
  add_column(sample = sampleLabels,
             group = sex)

pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
                          cols = PC1:PC14, # column names to be stored as a SINGLE variable
                          names_to = "PC", # name of that new variable (column)
                          values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.

ggplot(pca.pivot) +
  aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
  geom_bar(stat="identity") +
  facet_wrap(~PC) +
  labs(title="PCA 'small multiples' plot",
       caption=paste0("produced on ", Sys.time())) +
  theme_bw() +
  coord_flip()

  • Looking at the loadings, PC2 looks like sex, but is confounded with preservation date.

Machine/Run

#do PCA small multiples for run
#nothing matches for run
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
  as_tibble() %>%
  add_column(sample = sampleLabels,
             group = run)

pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
                          cols = PC1:PC14, # column names to be stored as a SINGLE variable
                          names_to = "PC", # name of that new variable (column)
                          values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.

ggplot(pca.pivot) +
  aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
  geom_bar(stat="identity") +
  facet_wrap(~PC) +
  labs(title="PCA 'small multiples' plot",
       caption=paste0("produced on ", Sys.time())) +
  theme_bw() +
  coord_flip()


Runxlane combo

#not enough data
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
  as_tibble() %>%
  add_column(sample = sampleLabels,
             group = run_lane)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.

pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
                          cols = PC1:PC14, # column names to be stored as a SINGLE variable
                          names_to = "PC", # name of that new variable (column)
                          values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.

ggplot(pca.pivot) +
  aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
  geom_bar(stat="identity") +
  facet_wrap(~PC) +
  labs(title="PCA 'small multiples' plot",
       caption=paste0("produced on ", Sys.time())) +
  theme_bw() +
  coord_flip()


Collection date

#maybe pC4
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
  as_tibble() %>%
  add_column(sample = sampleLabels,
             group = collection_date)

pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
                          cols = PC1:PC14, # column names to be stored as a SINGLE variable
                          names_to = "PC", # name of that new variable (column)
                          values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.

ggplot(pca.pivot) +
  aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
  geom_bar(stat="identity") +
  facet_wrap(~PC) +
  labs(title="PCA 'small multiples' plot",
       caption=paste0("produced on ", Sys.time())) +
  theme_bw() +
  coord_flip()


Preservation date

  • Maybe some relation to PC2, but not clear
#nothing really
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
  as_tibble() %>%
  add_column(sample = sampleLabels,
             group = preservation_date)

pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
                          cols = PC1:PC14, # column names to be stored as a SINGLE variable
                          names_to = "PC", # name of that new variable (column)
                          values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.

ggplot(pca.pivot) +
  aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
  geom_bar(stat="identity") +
  facet_wrap(~PC) +
  labs(title="PCA 'small multiples' plot",
       caption=paste0("produced on ", Sys.time())) +
  theme_bw() +
  coord_flip()


Preservation time

#not really enough samples to tell
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
  as_tibble() %>%
  add_column(sample = sampleLabels,
             group = time_preserved)

pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
                          cols = PC1:PC14, # column names to be stored as a SINGLE variable
                          names_to = "PC", # name of that new variable (column)
                          values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.

ggplot(pca.pivot) +
  aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
  geom_bar(stat="identity") +
  facet_wrap(~PC) +
  labs(title="PCA 'small multiples' plot",
       caption=paste0("produced on ", Sys.time())) +
  theme_bw() +
  coord_flip()


Preservation date x time

#PC2 - seems like all females preserved in the same group!
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
  as_tibble() %>%
  add_column(sample = sampleLabels,
             group = date_by_time_preserved)

pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
                          cols = PC1:PC14, # column names to be stored as a SINGLE variable
                          names_to = "PC", # name of that new variable (column)
                          values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.

ggplot(pca.pivot) +
  aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
  geom_bar(stat="identity") +
  facet_wrap(~PC) +
  labs(title="PCA 'small multiples' plot",
       caption=paste0("produced on ", Sys.time())) +
  theme_bw() +
  coord_flip()


Individual

  • Some relation to PC3 - but many of the next PCs seem like thy have partial association with individual
#PC3 separates individuals
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
  as_tibble() %>%
  add_column(sample = sampleLabels,
             group = individual)

pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
                          cols = PC1:PC14, # column names to be stored as a SINGLE variable
                          names_to = "PC", # name of that new variable (column)
                          values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.

ggplot(pca.pivot) +
  aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
  geom_bar(stat="identity") +
  facet_wrap(~PC) +
  labs(title="PCA 'small multiples' plot",
       caption=paste0("produced on ", Sys.time())) +
  theme_bw() +
  coord_flip()


SPME

#there was only 1
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
  as_tibble() %>%
  add_column(sample = sampleLabels,
             group = spme)

pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
                          cols = PC1:PC14, # column names to be stored as a SINGLE variable
                          names_to = "PC", # name of that new variable (column)
                          values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.

ggplot(pca.pivot) +
  aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
  geom_bar(stat="identity") +
  facet_wrap(~PC) +
  labs(title="PCA 'small multiples' plot",
       caption=paste0("produced on ", Sys.time())) +
  theme_bw() +
  coord_flip()


Captive duration

#hard to say
pca.res.df <- pca.res$x[,1:14] %>% # note that this is the first time you've seen the 'pipe' operator from the magrittr package
  as_tibble() %>%
  add_column(sample = sampleLabels,
             group = captivity)

pca.pivot <- pivot_longer(pca.res.df, # dataframe to be pivoted
                          cols = PC1:PC14, # column names to be stored as a SINGLE variable
                          names_to = "PC", # name of that new variable (column)
                          values_to = "loadings") # name of new variable (column) storing all the values (data)
pca.pivot$PC <- factor(pca.pivot$PC, levels = c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "PC11", "PC12", "PC13", "PC14"))
#Needed to change the PC column to factors so that the order of the small multiples plot is the correct. If did not do this, the order would be alphanumerical.

ggplot(pca.pivot) +
  aes(x=sample, y=loadings, fill=group) + # you could iteratively 'paint' different covariates onto this plot using the 'fill' aes
  geom_bar(stat="identity") +
  facet_wrap(~PC) +
  labs(title="PCA 'small multiples' plot",
       caption=paste0("produced on ", Sys.time())) +
  theme_bw() +
  coord_flip()


Final PCA plots

ggplot(pca.res.df) +
  aes(x=PC1, y=PC2, label=sampleLabels, shape = tissue, group=tissue) +
  geom_point(size=4, alpha=0.5) +
  #geom_label() +
  stat_ellipse() +
  xlab(paste0("PC1 (",pc.per[1],"%",")")) + 
  ylab(paste0("PC2 (",pc.per[2],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
  coord_fixed() +
  theme_bw()

ggplot(pca.res.df) +
  aes(x=PC1, y=PC2, label=sampleLabels, color = sex, group=sex) +
  geom_point(size=4, alpha=0.5) +
  #geom_label() +
  stat_ellipse() +
  xlab(paste0("PC1 (",pc.per[1],"%",")")) + 
  ylab(paste0("PC2 (",pc.per[2],"%",")")) +
  labs(title="PCA plot",
       caption=paste0("produced on ", Sys.time())) +
  coord_fixed() +
  theme_bw()

#plot PCs
p <- ggplot(pca.res.df, (aes(x=PC1, y=PC2))) +
  geom_point(aes(fill=tissue, shape=sex), size=3, alpha = 0.6) +
  xlab(paste0("PC1 (",pc.per[1],"%",")")) + 
  ylab(paste0("PC2 (",pc.per[2],"%",")")) +
  coord_fixed() +
  theme_bw() +
  scale_shape_manual(values = c(21,24), labels = c("female", "male")) +
  scale_fill_manual(values=c("black", "white"), labels = c("antenna", "leg")) +
  guides(
    fill = guide_legend(title = "Tissue", override.aes=list(shape = 21)), 
    shape = guide_legend(title = "Sex"))


p

#ggsave("PC1_v_2_tissue_and_sex_2025_03_12.pdf", plot = p, device = "pdf", width = 85, height = 60, dpi = 300, units = "mm")
#ggsave("PC1_v_2_tissue_and_sex_2025_03_12.png", device = "png", width = 85, height = 60, dpi = 300, units = "mm")
  • PC1 = tissue
  • PC2 = sex

Session info

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sonoma 14.7.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] cowplot_1.1.3               patchwork_1.3.0.9000       
##  [3] data.table_1.16.4           ggpattern_1.1.1            
##  [5] dendextend_1.19.0           rgl_1.3.14                 
##  [7] stargazer_5.2.3             gt_0.11.1                  
##  [9] plotly_4.10.4               DT_0.33                    
## [11] edgeR_4.2.2                 limma_3.60.6               
## [13] tximeta_1.22.1              DESeq2_1.44.0              
## [15] SummarizedExperiment_1.34.0 Biobase_2.64.0             
## [17] MatrixGenerics_1.16.0       matrixStats_1.4.1          
## [19] GenomicRanges_1.56.2        GenomeInfoDb_1.40.1        
## [21] IRanges_2.38.1              S4Vectors_0.42.1           
## [23] BiocGenerics_0.50.0         datapasta_3.1.0            
## [25] lubridate_1.9.4             forcats_1.0.0              
## [27] stringr_1.5.1               dplyr_1.1.4                
## [29] purrr_1.0.2                 readr_2.1.5                
## [31] tidyr_1.3.1                 tibble_3.2.1               
## [33] ggplot2_3.5.1               tidyverse_2.0.0            
## 
## loaded via a namespace (and not attached):
##   [1] rstudioapi_0.17.1        jsonlite_1.8.9           tximport_1.32.0         
##   [4] magrittr_2.0.3           GenomicFeatures_1.56.0   farver_2.1.2            
##   [7] rmarkdown_2.29           BiocIO_1.14.0            zlibbioc_1.50.0         
##  [10] vctrs_0.6.5              memoise_2.0.1            Rsamtools_2.20.0        
##  [13] RCurl_1.98-1.16          base64enc_0.1-3          htmltools_0.5.8.1       
##  [16] S4Arrays_1.4.1           progress_1.2.3           AnnotationHub_3.12.0    
##  [19] curl_6.0.1               SparseArray_1.4.8        sass_0.4.9              
##  [22] bslib_0.8.0              htmlwidgets_1.6.4        httr2_1.0.7             
##  [25] cachem_1.1.0             GenomicAlignments_1.40.0 lifecycle_1.0.4         
##  [28] pkgconfig_2.0.3          Matrix_1.7-0             R6_2.5.1                
##  [31] fastmap_1.2.0            GenomeInfoDbData_1.2.12  digest_0.6.37           
##  [34] colorspace_2.1-1         AnnotationDbi_1.66.0     RSQLite_2.3.9           
##  [37] labeling_0.4.3           filelock_1.0.3           timechange_0.3.0        
##  [40] httr_1.4.7               abind_1.4-8              compiler_4.4.1          
##  [43] bit64_4.5.2              withr_3.0.2              BiocParallel_1.38.0     
##  [46] viridis_0.6.5            DBI_1.2.3                biomaRt_2.60.1          
##  [49] MASS_7.3-60.2            rappdirs_0.3.3           DelayedArray_0.30.1     
##  [52] rjson_0.2.23             tools_4.4.1              glue_1.8.0              
##  [55] restfulr_0.0.15          grid_4.4.1               generics_0.1.3          
##  [58] gtable_0.3.5             tzdb_0.4.0               ensembldb_2.28.1        
##  [61] hms_1.1.3                xml2_1.3.6               XVector_0.44.0          
##  [64] BiocVersion_3.19.1       pillar_1.10.1            vroom_1.6.5             
##  [67] BiocFileCache_2.12.0     lattice_0.22-6           rtracklayer_1.64.0      
##  [70] bit_4.5.0.1              tidyselect_1.2.1         locfit_1.5-9.10         
##  [73] Biostrings_2.72.1        knitr_1.49               gridExtra_2.3           
##  [76] ProtGenerics_1.36.0      xfun_0.50                statmod_1.5.0           
##  [79] stringi_1.8.4            UCSC.utils_1.0.0         lazyeval_0.2.2          
##  [82] yaml_2.3.10              evaluate_1.0.3           codetools_0.2-20        
##  [85] BiocManager_1.30.25      cli_3.6.3                munsell_0.5.1           
##  [88] jquerylib_0.1.4          Rcpp_1.0.14              dbplyr_2.5.0            
##  [91] png_0.1-8                XML_3.99-0.17            parallel_4.4.1          
##  [94] blob_1.2.4               prettyunits_1.2.0        AnnotationFilter_1.28.0 
##  [97] bitops_1.0-9             txdbmaker_1.0.1          viridisLite_0.4.2       
## [100] scales_1.3.0             crayon_1.5.3             rlang_1.1.4             
## [103] KEGGREST_1.44.1